# spatial libraries
library(tmap)
library(terra)
library(sf)
library(tidyverse)

1 Get the data

1.1 Basemaps

Latam.raster <- terra::rast('data/Latam_raster.tif')
Latam_countries <- terra::rast('data/Latam_raster_countries.tif')

1.2 Covariates

  • Elevation
  • Net Primary Productivity
  • Bio 4: Temperature Seasonality
  • Bio 7: Temperature Annual Range
  • Bio 10: Mean Temperature of Warmest Quarter
  • Bio 13: Precipitation of Wettest Month
  • Bio 15: Precipitation Seasonality (Coefficient of Variation)

We area going to assume climate is constant and use WorldClim data. We could also use TerraClimate (climateR package). Later, we will consider CHELSEA (1901-2016) and make a cutoff at 1996.

1.2.1 Alternative for all variables

# for specifications on how these data were prepared see covariates_and_thinning_parameters.Rmd
elev <-  rast('big_data/elev.tif')
bio <- rast('big_data/bio.tif')

npp_pre <- terra::rast('big_data/npp_pre.tif')
npp_pos <- terra::rast('big_data/npp_pos.tif')
npp <- mean(c(npp_pre, npp_pos), na.rm=T)
rm(npp_pre, npp_pos)
npp <- resample(npp, elev, method='bilinear')
names(npp) <- 'npp'

tree_pre <- terra::rast('big_data/veg_pre.tif')
tree_pos <- terra::rast('big_data/veg_pos.tif')
tree <- mean(c(tree_pre, tree_pos), na.rm=T)
rm(tree_pre, tree_pos)
tree <- resample(tree, elev, method='bilinear')
names(tree) <- 'tree'

nontree_pre <- terra::rast('big_data/nontree_pre.tif')
nontree_pos <- terra::rast('big_data/nontree_pos.tif')
nontree <- mean(c(nontree_pre, nontree_pos), na.rm=T)
rm(nontree_pre, nontree_pos) # to free memory
nontree <- resample(nontree, elev, method='bilinear')
names(nontree) <- 'nontree'

# environmental layers for PA
env <- c(elev, bio, npp, tree, nontree) %>% 
  classify(cbind(NaN, NA))

# rm(elev, bio) # to free memory
# file.remove(terra::sources(npp), terra::sources(tree), terra::sources(nontree))
# terra::writeRaster(env, 'big_data/env.tif', overwrite=T)
#tmpFiles(remove=T) # to free memory

land_pre <- terra::rast('big_data/land_pre.tif')
land_pos <- terra::rast('big_data/land_pos.tif')
land <- modal(c(land_pre, land_pos), na.rm=T)
rm(land_pre, land_pos)
land <- resample(land, elev, method='near')
names(land) <- 'land'
rm(land_pre, land_pos)
# terra::writeRaster(land, 'big_data/land.tif', overwrite=T)
env <- terra::rast('big_data/env.tif')
land <- terra::rast('big_data/land.tif')

# resample and scaled values for PO
env_100k_resampled <- terra::resample(env, Latam.raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% scale() # scaled values to 0 mean and sd of 1

land_100k_resampled <- terra::resample(land, Latam.raster, method='near') %>% 
  classify(cbind(NaN, NA)) 

1.3 Thinning parametres

  • Accessibility
  • Country of origin (GBIF)
# for specifications on how these data were prepared see covariates_and_thinning_parameters.Rmd
acce <- terra::rast('big_data/acce.tif')
acce_100k <- terra::resample(x= acce, y = Latam.raster, method = 'bilinear') %>% 
  classify(cbind(NaN, NA)) 

# scaled values
acce_100k_resampled <- acce_100k / max(acce_100k[], na.rm=T)

2 Example case

The data used as an example will be for Herpailurus yagouaroundi.

2.1 Presence-only data

# for specifications on how these data were prepared see biodiversity_data_preparation.Rmd
PO_herpailurus_pre <- terra::rast('data/PO_herpailurus_pre_raster.tif') 
PO_herpailurus_pos <- terra::rast('data/PO_herpailurus_pos_raster.tif')

# calculate area in each raster cell
PO_herpailurus.area <- terra::cellSize(PO_herpailurus_pre[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% values()

# calculate coordinates for each raster cell
PO_herpailurus.coords <- terra::xyFromCell(PO_herpailurus_pre, cell=1:ncell(PO_herpailurus_pre)) %>% as.data.frame()
names(PO_herpailurus.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_herpailurus.cell <- 1:ncell(PO_herpailurus_pre)

# with the environmental variables values for each cell
PO_herpailurus.env <- env_100k_resampled[] # scaled values
PO_herpailurus.land <- land_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_herpailurus.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_herpailurus_pre.count <- PO_herpailurus_pre %>%
    classify(cbind(NaN, NA)) %>% values %>% as_tibble %>% 
  dplyr::select(count)

PO_herpailurus_pos.count <- PO_herpailurus_pos %>%
    classify(cbind(NaN, NA)) %>% values %>% as_tibble %>% 
  dplyr::select(count)

PO_herpailurus.countries <- Latam_countries %>% classify(cbind(NaN, NA))

# save pre and pos datasets 
PO_herpailurus_pre.model.data <- data.frame(PO_herpailurus.coords,
                                            pixel = PO_herpailurus.cell[],
                                            area = PO_herpailurus.area,
                                            pt.count = PO_herpailurus_pre.count,
                                            env = cbind(PO_herpailurus.env, 
                                                        land=PO_herpailurus.land),
                                            acce = PO_herpailurus.acce[], 
                                            country = PO_herpailurus.countries[])

PO_herpailurus_pos.model.data <- data.frame(PO_herpailurus.coords,
                                            pixel = PO_herpailurus.cell[],
                                            area = PO_herpailurus.area,
                                            pt.count = PO_herpailurus_pos.count,
                                            env = cbind(PO_herpailurus.env, 
                                                        land=PO_herpailurus.land),
                                            acce = PO_herpailurus.acce[], 
                                            country = PO_herpailurus.countries[])

2.2 Presence-absence data

# for specifications on how these data were prepared see biodiversity_data_preparation.Rmd
PA_herpailurus_pre <- readRDS('data/PA_herpailurus_pre_blobs.Rds')
PA_herpailurus_pos <- readRDS('data/PA_herpailurus_pos_blobs.Rds')

# calculate area, coordinates, and point count in each raster cell
PA_herpailurus.area.pre <- as.numeric(PA_herpailurus_pre$blobArea) 
PA_herpailurus.area.pos <- as.numeric(PA_herpailurus_pos$blobArea) 

PA_herpailurus.coords.pre <- st_coordinates(st_centroid(PA_herpailurus_pre)) %>% as_tibble()
PA_herpailurus.coords.pos <- st_coordinates(st_centroid(PA_herpailurus_pos)) %>% as_tibble()

PA_herpailurus.pixel.pre <- terra::cells(PO_herpailurus_pre, vect(st_centroid(PA_herpailurus_pre))) 
PA_herpailurus.pixel.pos <- terra::cells(PO_herpailurus_pos, vect(st_centroid(PA_herpailurus_pos))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_herpailurus.env.pre <- terra::extract(x = env, y = vect(PA_herpailurus_pre), fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_herpailurus.land.pre <- terra::extract(x = land, y = vect(PA_herpailurus_pre), fun = mode_raster, na.rm=TRUE)

PA_herpailurus.env.pos <- terra::extract(x = env, y = vect(PA_herpailurus_pos), fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_herpailurus.land.pos <- terra::extract(x = land, y = vect(PA_herpailurus_pos), fun = mode_raster, na.rm=TRUE)


PA_herpailurus_pre.model.data <- data.frame(pixel = PA_herpailurus.pixel.pre, 
                                            PA_herpailurus.coords.pre,
                                            area = PA_herpailurus.area.pre,
                                            presabs = PA_herpailurus_pre$presence,
                                            effort = PA_herpailurus_pre$effort,
                                            env = cbind(PA_herpailurus.env.pre[,2:24], 
                                                        land=PA_herpailurus.land.pre[,2]))

PA_herpailurus_pos.model.data <- data.frame(pixel = PA_herpailurus.pixel.pos, 
                                            PA_herpailurus.coords.pos,
                                            area = PA_herpailurus.area.pos,
                                            presabs = PA_herpailurus_pos$presence,
                                            effort = PA_herpailurus_pos$effort,
                                            env = cbind(PA_herpailurus.env.pos[,2:24], 
                                                        land=PA_herpailurus.land.pos[,2]))

2.2.1 Plots

tmap::tmap_mode(mode ='view')

tmap::tm_shape(PA_herpailurus_pre) +
  tmap::tm_fill(col = 'presence', n=2, 
                palette = 'Spectral', syle='pretty', legend.show = F)
tmap::tm_shape(PA_herpailurus_pos) +
  tmap::tm_fill(col = 'presence', n=2, 
                palette = 'Spectral', syle='pretty', legend.show = F) 

2.2.2 Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_herpailurus_pre), 
                                   vect(PA_herpailurus_pos)), #st_join with pos
                                     fun = mean, rm.na=T) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
##       elev              bio_1            bio_2            bio_3        
##  Min.   :   3.979   Min.   : 3.167   Min.   : 4.481   Min.   : 0.1679  
##  1st Qu.: 213.257   1st Qu.:19.585   1st Qu.:22.224   1st Qu.:16.5066  
##  Median : 497.230   Median :21.962   Median :24.043   Median :19.1422  
##  Mean   : 594.806   Mean   :21.939   Mean   :23.919   Mean   :19.4950  
##  3rd Qu.: 838.588   3rd Qu.:25.277   3rd Qu.:26.461   3rd Qu.:23.5156  
##  Max.   :4867.706   Max.   :27.973   Max.   :29.357   Max.   :27.4998  
##  NA's   :15         NA's   :12       NA's   :12       NA's   :12       
##      bio_4             bio_5             bio_6             bio_7        
##  Min.   :   5.19   Min.   :  2.509   Min.   :  0.000   Min.   :  6.456  
##  1st Qu.:1261.63   1st Qu.:197.883   1st Qu.:  9.369   1st Qu.: 49.491  
##  Median :1454.95   Median :247.154   Median : 23.739   Median : 67.208  
##  Mean   :1523.03   Mean   :246.075   Mean   : 38.365   Mean   : 62.919  
##  3rd Qu.:1690.88   3rd Qu.:290.276   3rd Qu.: 49.131   3rd Qu.: 78.410  
##  Max.   :3949.15   Max.   :525.345   Max.   :242.290   Max.   :170.527  
##  NA's   :12        NA's   :12        NA's   :12        NA's   :12       
##      bio_8              bio_9            bio_10             bio_11       
##  Min.   :   5.066   Min.   :  0.00   Min.   :   5.066   Min.   :   0.00  
##  1st Qu.: 534.711   1st Qu.: 38.52   1st Qu.: 370.655   1st Qu.:  60.57  
##  Median : 682.073   Median : 86.09   Median : 467.339   Median : 108.76  
##  Mean   : 674.292   Mean   :132.18   Mean   : 476.197   Mean   : 216.19  
##  3rd Qu.: 786.366   3rd Qu.:178.95   3rd Qu.: 595.545   3rd Qu.: 275.69  
##  Max.   :1422.255   Max.   :745.39   Max.   :1039.348   Max.   :1229.41  
##  NA's   :12         NA's   :12       NA's   :12         NA's   :12       
##      bio_12           bio_13          bio_14           bio_15     
##  Min.   : 5.858   Min.   :41.34   Min.   : 25.45   Min.   :12.07  
##  1st Qu.:10.000   1st Qu.:62.33   1st Qu.:109.72   1st Qu.:28.14  
##  Median :11.528   Median :66.51   Median :178.91   Median :29.85  
##  Mean   :11.123   Mean   :66.48   Mean   :186.23   Mean   :29.82  
##  3rd Qu.:12.223   3rd Qu.:70.18   3rd Qu.:241.28   3rd Qu.:32.21  
##  Max.   :17.895   Max.   :89.98   Max.   :618.79   Max.   :37.80  
##  NA's   :12       NA's   :12      NA's   :12       NA's   :12     
##      bio_16           bio_17           bio_18            bio_19       
##  Min.   :-9.938   Min.   : 8.875   Min.   : 0.1679   Min.   : 0.8405  
##  1st Qu.: 9.575   1st Qu.:15.849   1st Qu.:21.8649   1st Qu.:17.0033  
##  Median :11.590   Median :17.577   Median :23.6041   Median :19.5749  
##  Mean   :12.737   Mean   :17.080   Mean   :23.0882   Mean   :20.1615  
##  3rd Qu.:16.391   3rd Qu.:18.493   3rd Qu.:25.8062   3rd Qu.:23.6865  
##  Max.   :22.600   Max.   :32.448   Max.   :28.7550   Max.   :29.0108  
##  NA's   :12       NA's   :12       NA's   :12        NA's   :12       
##       npp               tree             nontree       
##  Min.   :  963.6   Min.   :  0.1802   Min.   :  2.503  
##  1st Qu.: 8491.3   1st Qu.: 19.0063   1st Qu.: 37.739  
##  Median :11076.2   Median : 35.1684   Median : 58.548  
##  Mean   :12093.6   Mean   : 42.6943   Mean   : 56.163  
##  3rd Qu.:14798.1   3rd Qu.: 62.0437   3rd Qu.: 69.688  
##  Max.   :32766.0   Max.   :200.0000   Max.   :200.000  
##  NA's   :12        NA's   :14         NA's   :14
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
## PA elev -> range: 4867.70556640625 - IQR: 625.331535738651
## PA bio_1 -> range: 27.9727489471436 - IQR: 5.69221101138655
## PA bio_2 -> range: 29.3572416595288 - IQR: 4.23737098316458
## PA bio_3 -> range: 27.4998134613037 - IQR: 7.0090252084622
## PA bio_4 -> range: 3949.14760902907 - IQR: 429.250822023361
## PA bio_5 -> range: 525.345114677183 - IQR: 92.3933596634924
## PA bio_6 -> range: 242.289643218898 - IQR: 39.7617290686917
## PA bio_7 -> range: 170.527460734049 - IQR: 28.9187885995615
## PA bio_8 -> range: 1422.25450478831 - IQR: 251.655401872575
## PA bio_9 -> range: 745.38912810801 - IQR: 140.432724756921
## PA bio_10 -> range: 1039.34758506756 - IQR: 224.890686035156
## PA bio_11 -> range: 1229.4060016273 - IQR: 215.120079503523
## PA bio_12 -> range: 17.8947978019714 - IQR: 2.22287131164246
## PA bio_13 -> range: 89.9765835587833 - IQR: 7.8512016351787
## PA bio_14 -> range: 618.79254805093 - IQR: 131.563156651565
## PA bio_15 -> range: 37.8010549545288 - IQR: 4.07810409150956
## PA bio_16 -> range: 22.6000003814697 - IQR: 6.81551563729807
## PA bio_17 -> range: 32.4479134503533 - IQR: 2.6439735514777
## PA bio_18 -> range: 28.7550323148608 - IQR: 3.94133305061106
## PA bio_19 -> range: 29.01082732813 - IQR: 6.68317446346737
## PA npp -> range: 32766 - IQR: 6306.80547018612
## PA tree -> range: 200 - IQR: 43.0374732644934
## PA nontree -> range: 200 - IQR: 31.9485002774701
# covariates layers for PO
env_PO <- terra::resample(env, Latam.raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
##       elev              bio_1            bio_2            bio_3       
##  Min.   :   2.195   Min.   : 2.118   Min.   : 5.522   Min.   :-2.701  
##  1st Qu.: 140.286   1st Qu.:18.307   1st Qu.:23.188   1st Qu.:12.658  
##  Median : 295.080   Median :23.476   Median :25.845   Median :21.209  
##  Mean   : 604.340   Mean   :21.401   Mean   :24.127   Mean   :18.395  
##  3rd Qu.: 692.138   3rd Qu.:25.739   3rd Qu.:26.837   3rd Qu.:24.919  
##  Max.   :4462.538   Max.   :27.797   Max.   :32.532   Max.   :27.235  
##      bio_4              bio_5             bio_6             bio_7        
##  Min.   :   2.301   Min.   :  1.181   Min.   :  0.000   Min.   :  6.735  
##  1st Qu.: 798.772   1st Qu.:132.409   1st Qu.:  7.267   1st Qu.: 41.522  
##  Median :1426.588   Median :227.126   Median : 21.731   Median : 60.359  
##  Mean   :1453.968   Mean   :223.743   Mean   : 39.544   Mean   : 60.428  
##  3rd Qu.:2026.749   3rd Qu.:315.360   3rd Qu.: 56.956   3rd Qu.: 78.513  
##  Max.   :6400.930   Max.   :720.784   Max.   :340.600   Max.   :154.105  
##      bio_8              bio_9             bio_10              bio_11         
##  Min.   :   2.164   Min.   :   0.00   Min.   :   0.1098   Min.   :   0.0183  
##  1st Qu.: 354.738   1st Qu.:  31.06   1st Qu.: 205.4128   1st Qu.:  55.8742  
##  Median : 608.297   Median :  86.84   Median : 332.9522   Median : 156.2056  
##  Mean   : 609.528   Mean   : 141.67   Mean   : 351.8857   Mean   : 312.3358  
##  3rd Qu.: 876.057   3rd Qu.: 210.38   3rd Qu.: 479.7126   3rd Qu.: 481.7002  
##  Max.   :1997.032   Max.   :1207.55   Max.   :1463.8673   Max.   :1820.8959  
##      bio_12           bio_13          bio_14           bio_15     
##  Min.   : 4.825   Min.   :35.58   Min.   : 24.21   Min.   :10.44  
##  1st Qu.: 9.760   1st Qu.:54.03   1st Qu.: 58.88   1st Qu.:29.95  
##  Median :11.468   Median :68.78   Median :147.11   Median :31.86  
##  Mean   :11.683   Mean   :66.56   Mean   :235.74   Mean   :30.67  
##  3rd Qu.:13.071   3rd Qu.:77.88   3rd Qu.:381.96   3rd Qu.:33.26  
##  Max.   :19.223   Max.   :91.20   Max.   :860.98   Max.   :41.97  
##      bio_16           bio_17           bio_18            bio_19       
##  Min.   :-9.787   Min.   : 8.357   Min.   : 0.7795   Min.   : 0.8727  
##  1st Qu.: 5.157   1st Qu.:12.707   1st Qu.:21.9295   1st Qu.:16.0399  
##  Median :14.086   Median :17.432   Median :24.8522   Median :22.3401  
##  Mean   :11.864   Mean   :18.803   Mean   :22.4651   Mean   :20.1846  
##  3rd Qu.:18.967   3rd Qu.:23.764   3rd Qu.:25.9559   3rd Qu.:25.5350  
##  Max.   :22.770   Max.   :39.095   Max.   :31.3904   Max.   :28.7862  
##       npp             tree             nontree       
##  Min.   : 1117   Min.   :  0.1373   Min.   :  6.137  
##  1st Qu.: 6820   1st Qu.: 14.6802   1st Qu.: 32.586  
##  Median : 9939   Median : 29.6195   Median : 53.025  
##  Mean   : 9768   Mean   : 35.7304   Mean   : 49.931  
##  3rd Qu.:12219   3rd Qu.: 57.3228   3rd Qu.: 66.424  
##  Max.   :32735   Max.   :112.0247   Max.   :124.779
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
## PO elev -> range: 4462.5380859375 - IQR: 551.851234436035
## PO bio_1 -> range: 27.7970905303955 - IQR: 7.43241691589355
## PO bio_2 -> range: 32.5316772460938 - IQR: 3.6493501663208
## PO bio_3 -> range: 27.2350292205811 - IQR: 12.2605299949646
## PO bio_4 -> range: 6400.93017578125 - IQR: 1227.9772644043
## PO bio_5 -> range: 720.784484863281 - IQR: 182.950798034668
## PO bio_6 -> range: 340.600128173828 - IQR: 49.6890859603882
## PO bio_7 -> range: 154.105056762695 - IQR: 36.9904708862305
## PO bio_8 -> range: 1997.0322265625 - IQR: 521.318466186523
## PO bio_9 -> range: 1207.54870605469 - IQR: 179.325882911682
## PO bio_10 -> range: 1463.86730957031 - IQR: 274.299758911133
## PO bio_11 -> range: 1820.89587402344 - IQR: 425.825933456421
## PO bio_12 -> range: 19.2225551605225 - IQR: 3.31159162521362
## PO bio_13 -> range: 91.2010192871094 - IQR: 23.8534049987793
## PO bio_14 -> range: 860.983764648438 - IQR: 323.077680587769
## PO bio_15 -> range: 41.9655418395996 - IQR: 3.31284523010254
## PO bio_16 -> range: 22.7698287963867 - IQR: 13.8097748756409
## PO bio_17 -> range: 39.0950622558594 - IQR: 11.0565657615662
## PO bio_18 -> range: 31.3903827667236 - IQR: 4.02646827697754
## PO bio_19 -> range: 28.7862033843994 - IQR: 9.4951229095459
## PO npp -> range: 32735.357421875 - IQR: 5398.97485351562
## PO tree -> range: 112.024726867676 - IQR: 42.6425590515137
## PO nontree -> range: 124.77921295166 - IQR: 33.8377475738525

2.3 Save the data

write_csv(left_join(env_PA.RangeIQR, env_PO.RangeIQR, by='env'), file='docs/range_and_IQR.csv')

saveRDS(PO_herpailurus_pre.model.data, 'data/PO_pre.rds') 
saveRDS(PO_herpailurus_pos.model.data, 'data/PO_post.rds') 

saveRDS(PA_herpailurus_pre.model.data, 'data/PA_pre.rds')
saveRDS(PA_herpailurus_pos.model.data, 'data/PA_post.rds')